
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>

#include <mpi.h>

#include "imageppm.h"

MPI_Datatype MPI_IMAGE_DESCRIPTOR;

typedef struct {
    int width;              //! Tamanho horizontal
    int inReal_BeginLine;   //! Linha da imagem original
    int scopeLines;         //! Numero de linhas do escopo
    int scopeLineStart;     //! Linha inicial a ser executada [0 e scopelines)
    int scopeProcessCount;  //! Quantas linhas devem ser processadas
    char color;             //! Cor a qual pertence

    unsigned char* data;
} mpiImageDescriptor;

#define MAX(x,y) x>y? x:y

void smooth(mpiImageDescriptor* src, mpiImageDescriptor* dst, int smooth_size){

    int i, j, l, m;
    int sum;

    int smdv = (smooth_size*2+1)*(smooth_size*2+1);

    for (i = 0; i < src->width; ++i){
        for (j = src->scopeLineStart; j < src->scopeProcessCount+src->scopeLineStart; ++j){

            sum = 0;

            for (l = -smooth_size; l <= smooth_size; ++l){ // Vertical
                for (m = -smooth_size; m <= smooth_size; ++m){ // Horizontal
                    const int row = j+l;
                    const int col = i+m;
                    if ( row < 0 || row >= src->scopeLines )
                        continue;
                    if ( col < 0 || col >= src->width )
                        continue;

                    sum += src->data[ col + row*src->width ];
                }
            }

            dst->data[i + (j-src->scopeLineStart)*src->width] = sum/smdv;
        }
    }

}


/**
 * Host sends each channel of the image to one of the 3 clients.
 * Each client process it's own channel and send the result back to the host when finished.
 * We need exactly 4 processes. (1 Host + 3 Clients)
 */
int handle_host_f1(int group_size, const char *fileIn, const char* fileOut, int smooth_size){

    struct timeval start_all, end_all;
    struct timeval start_exec, end_exec;
    gettimeofday(&start_all, NULL);

    Image *img;
    int i, total_data;
    mpiImageDescriptor descp;
    MPI_Status status;

    // Carregar imagem
    img = load_image(fileIn);
    total_data = img->width * img->height;

    // Result memory
    Image result;
    result.colorsize = img->colorsize;
    result.width = img->width;
    result.height = img->height;

    result.r = calloc(result.colorsize,total_data);
    result.g = calloc(result.colorsize,total_data);
    result.b = calloc(result.colorsize,total_data);

    int processing_units = group_size-1;
//    printf("Hi! I'm the host of %d other processes.\n", processing_units);

    // Base descriptor
    descp.width = img->width;

    // Remove extra cores
    if (img->height/processing_units < 5){
        for (i = (img->height/5)+1; i <= processing_units; ++i){
            descp.scopeProcessCount = 0;
            MPI_Send( &descp, 1, MPI_IMAGE_DESCRIPTOR, i, 1, MPI_COMM_WORLD);
//            printf("HOST: I don't need %d\n", i);
        }
        processing_units = img->height/5;
    }

    int p;
    int messages_sent = 3 * processing_units;
    int size_each = MAX(img->height/processing_units, 5);


    int extra_lines_for_last = img->height%processing_units;

    MPI_Request *responses = malloc(sizeof(MPI_Status)*messages_sent);

    gettimeofday(&start_exec, NULL);

//    printf("HOST: Sending messages for %d, with %d lines +%d for last\n", processing_units, size_each, extra_lines_for_last);
    for (i = 1; i <= 3; ++i){ // RGB -> 1 2 3

        descp.color = i;
        descp.data = getImageChannel(img, descp.color);
        unsigned char* save_at = (unsigned char*) getImageChannel(&result, i);
        for (p = 0; p < processing_units; ++p){ // Enviar um pedaco para cada core

            descp.inReal_BeginLine = p*size_each;

            descp.scopeLineStart = smooth_size;
            if (p == 0){
                descp.scopeLineStart = 0;
            }

            descp.scopeLines = size_each + smooth_size;
            descp.scopeProcessCount = size_each;
            if ( p > 0 && p < processing_units-1 ){ // Meio
                descp.scopeLines += smooth_size;
            } else if ( p == processing_units-1 ){ // Ultima
                descp.scopeLines += extra_lines_for_last;
                descp.scopeProcessCount += extra_lines_for_last;
            }

            // Send data to process
//            printf("HOST: Color %d, process unit %d, TotalLines %d, BeginLine %d, StartAt %d, ProcLines %d lines\n", i, p+1, descp.scopeLines, descp.inReal_BeginLine, descp.scopeLineStart, descp.scopeProcessCount);
            MPI_Send( &descp, 1, MPI_IMAGE_DESCRIPTOR, p+1, i, MPI_COMM_WORLD);
            MPI_Send( &descp.data[descp.width*( descp.inReal_BeginLine-descp.scopeLineStart ) ],
                       descp.width*descp.scopeLines, MPI_UNSIGNED_CHAR, p+1, i*2, MPI_COMM_WORLD);

            // Esperar resposta - Modelo 2 - ASYNC
            MPI_Irecv(&save_at[descp.width*descp.inReal_BeginLine], descp.width*descp.scopeProcessCount,
                      MPI_UNSIGNED_CHAR, p+1, 0, MPI_COMM_WORLD, &responses[ (i-1)*processing_units+p ]);
        }
    }

    // Send START
//    i = 1;
//    MPI_Bcast(&i, 1, MPI_INT, 0, MPI_COMM_WORLD);

    // Free original image
    release_image(img);

//    unsigned char* ptr;
    for (i = 0; i < messages_sent; ++i){

        // Modelo 1 - SYNC start
//        MPI_Recv(&descp, 1, MPI_IMAGE_DESCRIPTOR, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);
//        ptr = (unsigned char*) getImageChannel(&result, descp.color);
//        MPI_Recv(&ptr[descp.width*descp.inReal_BeginLine], descp.width*descp.scopeLines, MPI_UNSIGNED_CHAR, status.MPI_SOURCE, 1, MPI_COMM_WORLD, &status);

        // Modelo 2 - ASYNC
//        printf("Waiting for %d\n", i);
        MPI_Wait(&responses[i], &status);
    }

    gettimeofday(&end_exec, NULL);

    // Save the final image
    save_image(&result, fileOut, SAVE_MODE_BINARY);

    // Free memory
    free(responses);
    free(result.r);
    free(result.g);
    free(result.b);

    gettimeofday(&end_all, NULL);

    double result_all = end_all.tv_sec - start_all.tv_sec + (end_all.tv_usec - start_all.tv_usec) / 1000000.0;
    double result_exec = end_exec.tv_sec - start_exec.tv_sec + (end_exec.tv_usec - start_exec.tv_usec) / 1000000.0;
    double result_load = start_exec.tv_sec - start_all.tv_sec + (start_exec.tv_usec - start_all.tv_usec) / 1000000.0;
    double result_ret = end_all.tv_sec - end_exec.tv_sec + (end_all.tv_usec - end_exec.tv_usec) / 1000000.0;

    printf("%lf %lf %lf %lf\n", result_load, result_ret, result_exec, result_all);

    return 0;
}

/**
 * Client receives an ImageDescriptor and it's data, smooth it and send the result back to host
 */
void handle_client(int my_rank, int groupsize, int smooth_size)
{
    mpiImageDescriptor descpall[3], reslt[3];
    MPI_Status status;
    int MAIN_HOST = 0;
    int i;

//    printf("Hi! I'm %d of %d other processes.\n", my_rank, groupsize);
    for (i = 0; i < 3; ++i){
        mpiImageDescriptor* descp = &descpall[i];

        // Receive data from host
        MPI_Recv(descp, 1, MPI_IMAGE_DESCRIPTOR, MAIN_HOST, i+1, MPI_COMM_WORLD, &status);
        if (descp->scopeProcessCount == 0){
//            printf("CLIENT %d: I'm not needed =[\n", my_rank);
            return;
        }

        descp->data = malloc(descp->width*descp->scopeLines);
        MPI_Recv(descp->data, descp->width*descp->scopeLines, MPI_UNSIGNED_CHAR, MAIN_HOST, (i+1)*2, MPI_COMM_WORLD, &status);
//        printf("CLIENT %d: Received begining on %d\n", my_rank, descp->inReal_BeginLine);

        reslt[i].width = descp->width;
        reslt[i].color = descp->color;
        reslt[i].inReal_BeginLine = descp->inReal_BeginLine;
        reslt[i].scopeLines = descp->scopeProcessCount;
        reslt[i].scopeLineStart = 0;
        reslt[i].scopeProcessCount = descp->scopeProcessCount;
        reslt[i].data = malloc(descp->width*descp->scopeProcessCount);

        // Modelo 2 - ASYNC
        smooth(&descpall[i], &reslt[i], smooth_size);
        free(descpall[i].data);
        MPI_Send(reslt[i].data, reslt[i].width*reslt[i].scopeLines, MPI_UNSIGNED_CHAR, MAIN_HOST, 0, MPI_COMM_WORLD);
    }

    // Wait START
//    MPI_Bcast(&i, 1, MPI_INT, MAIN_HOST, MPI_COMM_WORLD);

    for (i = 0; i < 3; ++i){
        // Modelo 1 - sync start
//        smooth(&descpall[i], &reslt[i], smooth_size);
//        free(descpall[i].data);
//        printf("CLIENT %d: done processing %d color data\n", my_rank, descpall[i].color);

//        MPI_Send(&reslt[i], 1, MPI_IMAGE_DESCRIPTOR, MAIN_HOST, 0, MPI_COMM_WORLD);
//        MPI_Send(descpall[i].data + descpall[i].width*descpall[i].scopeLineStart, descpall[i].width*descpall[i].scopeProcessCount, MPI_UNSIGNED_CHAR, MAIN_HOST, 1, MPI_COMM_WORLD);

        free(reslt[i].data);
    }

}

void create_ImageDescriptorDataType(){

    const int nitems = 6;
    int blocklengths[6] = {1,1,1,1,1,1};
    MPI_Datatype types[6] = {MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_UNSIGNED_CHAR};
    MPI_Aint offsets[6];

    offsets[0] = offsetof(mpiImageDescriptor, width);
    offsets[1] = offsetof(mpiImageDescriptor, inReal_BeginLine);
    offsets[2] = offsetof(mpiImageDescriptor, scopeLines);
    offsets[3] = offsetof(mpiImageDescriptor, scopeLineStart);
    offsets[4] = offsetof(mpiImageDescriptor, scopeProcessCount);
    offsets[5] = offsetof(mpiImageDescriptor, color);

    MPI_Type_create_struct(nitems, blocklengths, offsets, types, &MPI_IMAGE_DESCRIPTOR);
    MPI_Type_commit(&MPI_IMAGE_DESCRIPTOR);
}

/**
 * argv[0] - Program name
 * argv[1] - Input image file name
 * argv[2] - Distribution function
 *              0 -> RGB channels
 */
int main(int argc, char *argv[])
{
    int smooth_size = 2;
	int comm_group_size;
    int my_rank;

	if ( MPI_Init(&argc, &argv) != MPI_SUCCESS ){
        printf("MPI_INIT failed\n");
        return -1;
	}

	create_ImageDescriptorDataType();

    MPI_Comm_size(MPI_COMM_WORLD, &comm_group_size);
    MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);

    if (my_rank == 0){
        if (handle_host_f1(comm_group_size, argv[1], argv[2], smooth_size) < 0){
            printf("OPS! Host failed!\n");
        }
    } else {
        handle_client(my_rank, comm_group_size, smooth_size);
    }

    MPI_Finalize();

	return 0;
}
